﻿<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>爬虫增删改查</title>
    <meta name="viewport" content="width=device-width,initial-scale=1,minimum-scale=1,maximum-scale=1,user-scalable=no">
    <link rel="stylesheet" type="text/css" href="http://hoppinzq.com/spider/css/style.css">
    <link rel="stylesheet" type="text/css" href="http://hoppinzq.com/spider/css/footer.css">
    <link rel="stylesheet" href="http://hoppinzq.com/spider/css/simple-bar.css">
    <link rel="stylesheet" href="http://hoppinzq.com/spider/css/main.css">
    <link rel="stylesheet" type="text/css" href="http://hoppinzq.com/spider/css/bootstrap.min.css">
    <link rel="stylesheet" href="http://hoppinzq.com/wyy/static/css/style.css">


</head>
<style>
    .radio-col-primary {
        height: 15px !important;
    }
</style>
<body>
<header class="header navbar-area others-pages">
    <div class="container">
        <div class="row align-items-center">
            <div class="col-lg-12">
                <div class="nav-inner">

                    <nav class="navbar navbar-expand-lg">
                        <a class="navbar-brand" href="">
                            <img src="http://hoppinzq.com/static/images/logo/1640338711_113639.png" alt="Logo">
                        </a>
                        <button class="navbar-toggler mobile-menu-btn" type="button" data-bs-toggle="collapse"
                                data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent"
                                aria-expanded="false" aria-label="Toggle navigation">
                            <span class="toggler-icon"></span>
                            <span class="toggler-icon"></span>
                            <span class="toggler-icon"></span>
                        </button>
                        <div class="collapse navbar-collapse sub-menu-bar" id="navbarSupportedContent">
                            <ul id="nav" class="navbar-nav ms-auto">
                                <li class="nav-item">
                                    <a href="index.html" aria-label="Toggle navigation">首页</a>
                                </li>
                                <li class="nav-item">
                                    <a href="documentation.html" aria-label="Toggle navigation">文档</a>
                                </li>
                                <li class="nav-item">
                                    <a href="contact.html" aria-label="Toggle navigation">建议</a>
                                </li>
                            </ul>
                        </div>

                    </nav>

                </div>
            </div>
        </div>
    </div>
</header>
<div class="box">
    <div class="title pctitle">配置您的爬虫</div>
    <p class="pcIP"></p>
    <div class="content">
        <div class="container content_width">
            <div class="person_search">
                <div class="search_input">
                    <div class="input-group mb-3">
                        <span>字段描述：</span>
                        <input id="Ktext" type="text" class="form-control" placeholder="字段描述">
                    </div>
                </div>
                <div class="search_input">
                    <button class="btn btn-primary search_btn" type="button" id="search_btn">查询</button>
                    <button class="btn btn-primary search_btn" type="button" id="back_btn">重置</button>
                </div>
            </div>

            <div class="line" style="margin-bottom:0px"></div>
        </div>
        <div class="export">
            <button id="new_add" type="button" class="btn btn-primary btn-sm" data-toggle="modal"
                    data-target="#pachong">
                <img src="http://hoppinzq.com/spider/img/add_two.png">
                <span>添加爬虫字段内容</span>
            </button>
            <div class="modal fade" id="pachong">
                <div class="modal-dialog modal-lg modal_position">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title">爬虫字段内容添加</h4>
                            <button type="button" class="close close_md" data-dismiss="modal">&times;</button>
                        </div>
                        <div class="modal-body">
                            <table id="xztb" class="table">
                                <tbody>
                                <tr>
                                    <td colspan="4">
                                        <a href="#syxz" class="fxt-switcher-text ms-2 text-dark close_md">不知道这些字段什么意思?戳我。</a>
                                    </td>
                                </tr>
                                <tr>
                                    <td class="tb_bg"><font style="font-size: 14px; color: red;">*</font>字段描述
                                    </td>
                                    <td><input class="description" type="text" placeholder="请输入字段描述"></td>
                                    <td class="tb_bg"><font style="font-size: 14px; color: red;">*</font>键值
                                    </td>
                                    <td><input class="key" type="text" placeholder="请输入字段对应键值"></td>
                                </tr>
                                <tr>
                                    <td class="tb_bg" style="cursor: help" title="不知道这是什么？戳上面">xpath
                                    </td>
                                    <td><input class="xpath" type="text" placeholder="请输入xpath"></td>
                                    <td class="tb_bg" style="cursor: help" title="不知道这是什么？戳上面">xpath函数</td>
                                    <td>
                                        <select class="form-control select_down xpath_f" style="font-size: 13px; color: #666;">
                                            <option></option>
                                            <option>text(0)</option>
                                            <option>allText()</option>
                                            <option>tidyText()</option>
                                            <option>html()</option>
                                            <option>outerHtml()</option>
                                        </select>
                                    </td>
                                </tr>
                                <tr>
                                    <td class="tb_bg" style="cursor: help" title="不知道这是什么？戳上面">css选择器
                                    </td>
                                    <td><input class="cssSelector" type="text" placeholder="请输入css选择器"></td>
                                    <td class="tb_bg" style="cursor: help" title="不知道这是什么？戳上面">css选择器属性</td>
                                    <td><input class="cssSelectorAttr" type="text" placeholder="请输入css选择器属性"></td>
                                </tr>
                                <tr>
                                    <td class="tb_bg" style="cursor: help" title="不知道这是什么？戳上面">正则表达式
                                    </td>
                                    <td colspan="3"><input class="regexS" type="text" placeholder="请输入正则表达式"></td>
                                </tr>
                                <tr>
                                    <td class="tb_bg">是否多个
                                    </td>
                                    <td>
                                        <div style="display: flex">
                                            <div class="col-lg-6"><input name="group1" type="radio" id="radio_1_y"
                                                                         class="radio-col-primary"
                                                                         style="width: 50% !important;"
                                                                         checked="">
                                                <label for="radio_1_y">是</label></div>
                                            <div class="col-lg-6"><input name="group1" type="radio" id="radio_1_n"
                                                                         class="radio-col-primary"
                                                                         style="width: 50% !important;"
                                                                         checked="checked">
                                                <label for="radio_1_n">否</label></div>
                                        </div>
                                    </td>
                                </tr>
                                <tr>
                                    <td class="tb_bg">是否爬取链接
                                    </td>
                                    <td>
                                        <div style="display: flex">
                                            <div class="col-lg-6"><input name="group2" type="radio" id="radio_2_y"
                                                                         class="radio-col-primary"
                                                                         style="width: 50% !important;"
                                                                         checked="">
                                                <label for="radio_2_y">是</label></div>
                                            <div class="col-lg-6"><input name="group2" type="radio" id="radio_2_n"
                                                                         class="radio-col-primary"
                                                                         style="width: 50% !important;"
                                                                         checked="checked">
                                                <label for="radio_2_n">否</label></div>
                                        </div>
                                    </td>
                                    <td class="tb_bg">是否将爬取链接的放入池中
                                    </td>
                                    <td>
                                        <div style="display: flex">
                                            <div class="col-lg-6"><input name="group3" type="radio" id="radio_3_y"
                                                                         class="radio-col-primary"
                                                                         style="width: 50% !important;"
                                                                         checked="">
                                                <label for="radio_3_y">是</label></div>
                                            <div class="col-lg-6"><input name="group3" type="radio" id="radio_3_n"
                                                                         class="radio-col-primary"
                                                                         style="width: 50% !important;"
                                                                         checked="checked">
                                                <label for="radio_3_n">否</label></div>
                                        </div>
                                    </td>
                                </tr>

                                </tbody>
                            </table>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-secondary close_md" data-dismiss="modal">关闭</button>
                            <button id="add_btn" type="button" class="btn btn-secondary">确定</button>
                        </div>
                    </div>
                </div>
            </div>
            <div class="modal fade" id="testModal">
                <div class="modal-dialog modal-lg modal_position">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title">测试爬虫</h4>
                            <button type="button" class="close close_md" data-dismiss="modal">&times;</button>
                        </div>
                        <div class="modal-body">
                            <div class="row contact-us">
                                <div class="contact-form">
                                    <div class="row">
                                        <div class="col-9">
                                            <div class="form-group">
                                                <input name="url" id="url" type="text" placeholder="输入测试的网站" required="required">
                                            </div>
                                        </div>
                                        <div class="col-3">
                                            <div class="button" style="margin-top: 0px;">
                                                <button type="button" id="test_l" style="height: 55px;background-color: #61b864 !important" class="btn">测试</button>
                                            </div>
                                        </div>
                                    </div>
                                    <button id="new_add_pc" type="button" class="btn btn-primary btn-sm">
                                        <img src="http://hoppinzq.com/spider/img/add_two.png">
                                        <span>新增该爬虫配置？</span>
                                    </button>
                                    <div class="row json">

                                    </div>
                                    <div class="row add_pc" style="display:none;">
                                        <table class="table">
                                            <tbody>
                                            <tr>
                                                <td class="tb_bg"><font style="font-size: 14px; color: red;">*</font>爬虫名称
                                                </td>
                                                <td><input style="width: 100%" class="pc__name" type="text" placeholder="爬虫名称"></td>
                                                <td class="tb_bg" style="cursor: help" title="不知道这是什么？">线程数
                                                </td>
                                                <td><input class="thread" type="number" value="1" disabled></td>
                                            </tr>
                                            <tr>
                                                <td class="tb_bg">爬虫的网页URL
                                                </td>
                                                <td colspan="3"><input style="width: 100%" class="url__" type="text" placeholder="爬虫的网页URL"></td>
                                            </tr>
                                            <tr>
                                                <td class="tb_bg">爬虫描述
                                                </td>
                                                <td colspan="3"><input style="width: 100%" class="desc__" type="text" placeholder="爬虫描述"></td>
                                            </tr>
                                            </tbody>
                                        </table>
                                        <div class="button" style="margin-top: 0px;">
                                            <button type="button" id="add_pc__" style="height: 55px;background-color: #61b864 !important" class="btn">新增</button>
                                        </div>
                                    </div>
                                </div>
                            </div>

                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-secondary close_md" data-dismiss="modal">关闭</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="table-wrapper table-responsive">
            <table id="tb" class="table">
                <thead>
                <tr>
                    <th>字段描述</th>
                    <th>键值</th>
                    <th>xpath</th>
                    <th>xpath函数</th>
                    <th>css选择器</th>
                    <th>css选择器属性</th>
                    <th>正则表达式</th>
                    <th>是否多个</th>
                    <th>是否爬取链接</th>
                    <th>是否将爬取链接的放入池中</th>
                    <th>操作</th>
                </tr>
                </thead>
                <tbody id="show_tbody">

                </tbody>
            </table>
        </div>
        <div class="search_input">
            <button class="btn btn-primary" type="button" id="text">测试爬虫</button>
        </div>
    </div>
</div>
<div class="container">
    <div class="row">
        <div class="col-lg-12">
            <div class="fxt-column-wrap justify-content-between">
                <div class="fxt-transformX-L-50 fxt-transition-delay-5">
                    <div class="fxt-middle-content">
                        <h1 class="fxt-main-title" id="syxz">爬虫配置须知</h1>
                        <p class="font-blod text-success sqzk" style="cursor:pointer;">[收起/展开]</p>
                        <div class="fxt-switcher-description1">
                            <p class="font-blod">你有三种方式告诉我爬取的内容：</p>
                            <div class="col-12">
                                <p class="text-muted">
                                    1、通过XPath和XPath自定义函数：我们知道html实际是XML，XPath即为XML路径语言（XML Path Language），它是一种用来确定XML文档中某部分位置的语言
                                    不会也没事，因为你可以借助F12的控制台窗口获取到任意dom的XPath。
                                    <img src="http://hoppinzq.com/spider/img/n2wvBwdcaN.png">
                                </p>
                                <p>由于xpath函数很多，为了降低使用难度，采取了下拉框选择的形式。下面是部分XPath的自定义函数及其描述：
                                <table>
                                    <thead>
                                    <tr>
                                        <th>表达式</th>
                                        <th>描述</th>
                                    </tr>
                                    </thead>
                                    <tbody>
                                    <tr>
                                        <td>text(n)</td>
                                        <td>第n个直接文本子节点，为0表示所有</td>
                                    </tr>
                                    <tr>
                                        <td>tidyText()</td>
                                        <td>所有的直接和间接文本子节点，并将一些标签替换为换行，使纯文本显示更整洁</td>
                                    </tr>
                                    <tr>
                                        <td>html()</td>
                                        <td>内部html，不包括标签的html本身</td>
                                    </tr>
                                    <tr>
                                        <td>outerHtml()</td>
                                        <td>内部html，包括标签的html本身</td>
                                    </tr>
                                    </tbody>
                                </table>
                                </p>

                                <p class="text-muted">
                                    2、通过CSS选择器，其中CSS选择器属性表示CSS选择器选中的dom的属性，可不填。css选择器需要你具备基本的
                                    前端知识，有的时候选择器可能会选中多个dom元素，此情况下请将“是否多个”选择为“是”，否则只会获取选择器选中的
                                    第一个。
                                </p>
                                <p class="text-muted">
                                    3、通过正则表达式，比较有技术含量，不会者或者不熟练者慎用，XPath就完全够用了。正则表达式可以跟上面XPath和CSS选择器一起用，用于对爬取的内容再用正则表达式匹配。
                                    比如下面爬所有媒体内容的正则表达式:
                                    <br>
                                    <code>
                                        <(img|video|source)\b[^>]*\b(src)\b\s*=\s*('|")?([^'">]+(\.jpg|\.gif|\.png|\.svg|\.mp4|\.mp3)\b)[^>]*>
                                    </code>
                                </p>
                            </div>
                            <p class="font-blod">
                                还有两个字段需要你注意：
                            </p>
                            <p class="font-blod">1、是否爬取链接，即爬取你指定dom的href属性，链接分为站内链接（链接跟域名同域的）；
                                站外链接（链接跟域名不一样：如csdn放了一个指向百度的友链）；其他链接（锚链接，软件链接等）。只会爬取站内链接！</p>
                            <p class="font-blod">2、是否将爬取链接的放入池中，即继续爬取该链接内的内容（通过上面配置的方式）。</p>
                        </div>
                    </div>
                </div>
            </div>
        </div>
    </div>
</div>

<div class="container" style="margin-bottom: 30px;margin-top: 30px;">
    <div style="border-bottom: 1px solid #ebebeb;">
        <div class="row mb-n7">
            <p>→<a href="http://1.15.232.156/wyy/login.html" target="_blank">盗号实战</a>←</p>
        </div>
    </div>
</div>
<footer class="footer">
    <div class="container">
        <div class="row mb-n7 footer-list"><div class="col-lg-3 col-sm-6 mb-7">
            <div class="footer-widget">
                <h4 class="footer-title footer-title-0">联系方式</h4><ul class="footer-des"><li class="footer-menu-items"><i class="lab la-qq"></i><a class="footer-menu-link" href="tencent://Message/?Uin=937040147&amp;websiteName=15028582175&amp;Menu=yes">937040147</a></li><li class="footer-menu-items"><i class="lab la-weixin"></i><a class="footer-menu-link" href="#">HOPPIN_HAZZ</a></li><li class="footer-menu-items"><i class="las la-envelope-open"></i><a class="footer-menu-link" href="mailto:anmiezata@163.com">anmiezata@163.com</a></li><li class="footer-menu-items"><i class="las la-phone-volume"></i><a class="footer-menu-link" href="tel:15028582175">15028582175</a></li></ul>
            </div>
        </div><div class="col-lg-3 col-sm-6 mb-7">
            <div class="footer-widget">
                <h4 class="footer-title footer-title-1">友情链接</h4><ul class="footer-des"><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://www.bilibili.com/">哔哩哔哩</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://gitee.com">码云Gitee</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://www.csdn.net/">CSDN</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://www.runoob.com/">菜鸟教程</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://leetcode-cn.com/">力扣(leetcode)</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://processon.com/">Process on</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://github.com/">Github</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://www.cnblogs.com/">博客园</a></li></ul>
            </div>
        </div><div class="col-lg-3 col-sm-6 mb-7">
            <div class="footer-widget">
                <h4 class="footer-title footer-title-2">关于我</h4><ul class="footer-des"><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://space.bilibili.com/230296772?spm_id_from=333.1007.0.0">B站：装甲浮士德</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://gitee.com/hoppin">我的Gitee账号</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://blog.csdn.net/qq_41544289?spm=1000.2115.3001.5343">我的博客</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://github.com/HOPPINAN_HAZZ">Github账号</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://music.163.com/#/user/home?id=309621600">网易云时间</a></li></ul>
            </div>
        </div><div class="col-lg-3 col-sm-6 mb-7">
            <div class="footer-widget">
                <h4 class="footer-title footer-title-3">开源项目</h4><ul class="footer-des"><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://gitee.com/hoppin/hoppinzq">剑来框架</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://gitee.com/hoppin/hoppinzq-netease_cloud_music">网易云</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://gitee.com/hoppin/hoppinzq-jquery-zjax">zjax</a></li><li class="footer-menu-items"><a class="footer-menu-link" target="_blank" href="https://gitee.com/hoppin/hoppinzq-resourcecache">zCache</a></li></ul>
            </div>
        </div></div>
    </div>
    <div class="copy-right bg-dark">
        <div class="container">
            <div class="row">
                <div class="col-12">
                    <p><a target="_blank" href="https://beian.miit.gov.cn/">Copyright © HOPPINZQ.
                        ICP主体备案号:鲁ICP备2021023310号</a></p>
                </div>
            </div>
        </div>
    </div>
</footer>
<a href="#" class="scroll-top">
    <i class="lni lni-chevron-up"></i>
</a>
<script src="/static/js/config.js"></script>
<script src="http://hoppinzq.com/spider/js/jquery.min.js"></script>
<script src="http://hoppinzq.com/spider/js/bootstrap.min.js"></script>
<script src="http://hoppinzq.com/spider/js/bootbox.min.js"></script>
<script src="https://www.json.cn/static/kj/js/bignumber.min.js"></script>
<script src="http://hoppinzq.com/static/js/plugins/buttonLoading.js"></script>
<script src="https://www.json.cn/static/202010/js/jquery.json.js"></script>
<script src="/static/js/mejs.js"></script>

</body>
</html>